{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 准备工作\n",
    "from requests_html import HTMLSession\n",
    "import requests_html\n",
    "import pandas as pd\n",
    "import urllib.parse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "学校要闻url = 'https://www.nfu.edu.cn/xxyw/index.htm'\n",
    "校园动态url = 'https://www.nfu.edu.cn/xydt/index.htm'\n",
    "通知公告url = 'https://www.nfu.edu.cn/tzgg/index.htm'\n",
    "招投标url = 'https://www.nfu.edu.cn/ztb/index.htm'\n",
    "高教动态url = 'https://www.nfu.edu.cn/gjdt/index.htm'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = HTMLSession()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "91\n"
     ]
    }
   ],
   "source": [
    "# 校园要闻\n",
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xxyw/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index84.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index85.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index86.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index87.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index88.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index89.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index90.htm']"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xxyw_url_group = ['https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm' for i in range(1,91)]\n",
    "xxyw_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xxyw/index.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index84.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index85.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index86.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index87.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index88.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index89.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index90.htm']"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xxyw_url_group.insert(0,'https://www.nfu.edu.cn/xxyw/index.htm')\n",
    "xxyw_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xydt/index.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index84.htm']"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xydt_url_group.insert(0,'https://www.nfu.edu.cn/xydt/index.htm')\n",
    "xydt_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "85\n"
     ]
    }
   ],
   "source": [
    "# 校园动态\n",
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xydt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index84.htm']"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xydt_url_group = ['https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm' for i in range(1,85)]\n",
    "xydt_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35\n"
     ]
    }
   ],
   "source": [
    "# 通知公告\n",
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/tzgg/index1.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index2.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index3.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index4.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index5.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index6.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index7.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index8.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index9.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index10.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index11.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index12.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index13.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index14.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index15.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index16.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index17.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index18.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index19.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index20.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index21.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index22.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index23.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index24.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index25.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index26.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index27.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index28.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index29.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index30.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index31.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index32.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index33.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index34.htm']"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tzgg_url_group = ['https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm' for i in range(1,35)]\n",
    "tzgg_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/tzgg/index.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index1.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index2.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index3.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index4.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index5.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index6.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index7.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index8.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index9.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index10.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index11.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index12.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index13.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index14.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index15.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index16.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index17.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index18.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index19.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index20.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index21.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index22.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index23.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index24.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index25.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index26.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index27.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index28.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index29.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index30.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index31.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index32.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index33.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index34.htm']"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tzgg_url_group.insert(0,'https://www.nfu.edu.cn/tzgg/index.htm')\n",
    "tzgg_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22\n"
     ]
    }
   ],
   "source": [
    "# 招投标\n",
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/ztb/index1.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index2.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index3.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index4.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index5.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index6.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index7.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index8.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index9.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index10.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index11.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index12.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index13.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index14.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index15.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index16.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index17.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index18.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index19.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index20.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index21.htm']"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ztb_url_group = ['https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm' for i in range(1,22)]\n",
    "ztb_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/ztb/index.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index1.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index2.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index3.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index4.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index5.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index6.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index7.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index8.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index9.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index10.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index11.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index12.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index13.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index14.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index15.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index16.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index17.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index18.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index19.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index20.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index21.htm']"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ztb_url_group.insert(0,'https://www.nfu.edu.cn/ztb/index.htm')\n",
    "ztb_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26\n"
     ]
    }
   ],
   "source": [
    "# 高教动态\n",
    "for i in range(1,100):\n",
    "    r = session.get('https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm')\n",
    "    if r.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/gjdt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index25.htm']"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gjdt_url_group = ['https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm' for i in range(1,26)]\n",
    "gjdt_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/gjdt/index.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index25.htm']"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gjdt_url_group.insert(0,'https://www.nfu.edu.cn/gjdt/index.htm')\n",
    "gjdt_url_group"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/xxyw/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 存储html文件\n",
    "urlparse = urllib.parse.urlparse(学校要闻_url)\n",
    "urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/xxyw/index.htm'"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "urllib.parse.urlparse(xxyw_url_group[0]).path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'html_out//xxyw/index.htm'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-77-bb06d6779dd9>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'html_out/'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"utf8\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m         \u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'html_out//xxyw/index.htm'"
     ]
    }
   ],
   "source": [
    "for url in xxyw_url_group:\n",
    "    r = session.get(url)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'html_out//xydt/index1.htm'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-45-14c309ad9f12>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'html_out/'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"utf8\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m         \u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'html_out//xydt/index1.htm'"
     ]
    }
   ],
   "source": [
    "for url in xydt_url_group:\n",
    "    r = session.get(url)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'html_out//tzgg/index.htm'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-46-8df585841645>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'html_out/'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"utf8\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m         \u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'html_out//tzgg/index.htm'"
     ]
    }
   ],
   "source": [
    "for url in tzgg_url_group:\n",
    "    r = session.get(url)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'html_out//ztb/index1.htm'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-47-e68488f07b47>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'html_out/'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"utf8\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m         \u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'html_out//ztb/index1.htm'"
     ]
    }
   ],
   "source": [
    "for url in ztb_url_group:\n",
    "    r = session.get(url)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'html_out//gjdt/index.htm'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-48-215f6db48651>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'html_out/'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"utf8\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m         \u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'html_out//gjdt/index.htm'"
     ]
    }
   ],
   "source": [
    "for url in gjdt_url_group:\n",
    "    r = session.get(url)\n",
    "    path = urllib.parse.urlparse(url).path\n",
    "    with open ('html_out/'+path, encoding = \"utf8\", mode = \"w\") as fp:\n",
    "        fp.write(r.html.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[WinError 3] 系统找不到指定的路径。: 'html_out/xxyw/'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-49-6b5d4db161ec>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 存excel文件\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mfiles\u001b[0m\u001b[1;33m=\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'html_out/xxyw/'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfiles\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 3] 系统找不到指定的路径。: 'html_out/xxyw/'"
     ]
    }
   ],
   "source": [
    "# 存excel文件\n",
    "import os\n",
    "files= os.listdir('html_out/xxyw/')\n",
    "print(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
